library(ggplot2)
library(stringr)
library(DT)
library(plyr)
library(dplyr)
library(biomaRt)
library(Biobase)
library(reshape2)
library(formattable)
library(VennDiagram)
library(hypeR)
library(xlsx)
PATH <- getwd()
The summary statistics and respective plots along with eSet creation is in 00_summstats_eset.{Rmd, html}, imputation with random forest is in 01_pml_imp_summary.{Rmd, html}, DimRed analysis is in 02_dimred_hvg.{Rmd, html}. This file contains differential expression analysis using DESeq2 along with significant marker info. The analysis is performed with age, sex and smoking status as covariates with histopathological group.
eset <- readRDS(file.path(PATH, "data/2021_08_20_eset_imputed_updated.RDS"))
eSet_wo_infl <- eset
table(eSet_wo_infl$Class)
##
## Cancer Control Dysplasia HkNR
## 9 18 22 17
cpm_eset <- eSet_wo_infl
exprs(cpm_eset) <- apply(exprs(cpm_eset), 2, function(x) {x/(sum(x)/1000000)})
print(dim(cpm_eset))
## Features Samples
## 21510 66
eSet_wo_infl$Class <- recode(eSet_wo_infl$Class, "Cancer"="OSCC")
eSet_wo_infl$Class <- factor(eSet_wo_infl$Class, levels = c("Control", "HkNR", "Dysplasia", "OSCC"))
Reference level = Control
cancer.vs.control <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('Control', 'OSCC'))]
colData <- data.frame(condition=cancer.vs.control$Class)
colData$sex <- cancer.vs.control$Sex
colData$smoke <- cancer.vs.control$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(cancer.vs.control),
colData=colData,
design= formula(~sex+smoke+condition))
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = "Control")
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 597 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
results_cancer.vs.control<- DESeq2::results(dds_results)
summary(results_cancer.vs.control)
## [1] "DESeqResults object of length 6 with 2 metadata columns"
results_cancer.vs.control <- results_cancer.vs.control[!is.na(results_cancer.vs.control$padj),]
results_cancer.vs.control$genes <- rownames(results_cancer.vs.control)
print(c("fdr<=0.05"=sum(results_cancer.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_cancer.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_cancer.vs.control$padj<=.001)))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 4034 1953 751
fdr <- 0.05
max_fc <- 1.5
min_fc <- 1.5
cancer.vs.control_up <- rownames(results_cancer.vs.control[which((results_cancer.vs.control$padj <= fdr) & (results_cancer.vs.control$log2FoldChange > max_fc)),])
cancer.vs.control_down <- rownames(results_cancer.vs.control[which((results_cancer.vs.control$padj <= fdr) & (results_cancer.vs.control$log2FoldChange < -min_fc)),])
c("cancer.vs.control_up"= length(cancer.vs.control_up),
"cancer.vs.control_down"= length(cancer.vs.control_down))
## cancer.vs.control_up cancer.vs.control_down
## 402 654
cancer.vs.control_marker_list <- list(cancer.vs.control_up, cancer.vs.control_down)
DT::datatable(data.frame(results_cancer.vs.control[results_cancer.vs.control$padj<=.05 & results_cancer.vs.control$log2FoldChange >= 1.5, ]), caption = "Cancer.vs.Control-Upregulated")
DT::datatable(data.frame(results_cancer.vs.control[results_cancer.vs.control$padj<=.05 & results_cancer.vs.control$log2FoldChange <= -1.5, ]), caption = "Cancer.vs.Control-Downregulated")
#function for hclust with optimal leaf ordering from S. Monti / D. Gusenleitner
hcopt <- function(d, HC=NULL, method = "ward", members = NULL){
require("cba")
if ( is.null(HC) ) {
HC <- hclust(d,method=method,members=members)
}
#optimal leaf ordering
ORD <- order.optimal(d,merge=HC$merge)
HC$merge <- ORD$merge
HC$order <- ORD$order
HC
}
Significant diffex genes padj <= 0.05 and logfc +/- 1.5 with hcopt()
topgenes <- unlist(cancer.vs.control_marker_list)
heatdata <-exprs(cancer.vs.control)[topgenes,]
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- c('Class', 'imputed_smoking_label')
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
## Loading required package: cba
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
annot_col <- pData(cancer.vs.control)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
umap_results <- umap::umap(t(heatdata), config = umap::umap.defaults )
dtp <- data.frame(umap_results$layout)
dtp$Sample <- colnames(heatdata)
dtp$Class <- pData(cancer.vs.control)$Class
dtp$Type <- pData(cancer.vs.control)$Type
plotly::ggplotly(ggplot(data = dtp, aes_string(x=dtp$X1, y=dtp$X2, label= "Sample", col = "Class", shape="Type")) +
ggplot2::geom_point()+ ggplot2::labs(x = 'UMAP1', y = 'UMAP2'))
Reference level = Control
dys.vs.control <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('Control', 'Dysplasia'))]
colData <- data.frame(condition=dys.vs.control$Class)
colData$sex <- dys.vs.control$Sex
colData$smoke <- dys.vs.control$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(dys.vs.control),
colData=colData,
design = ~sex+smoke+condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = 'Control')
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 311 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
#get the results in a nice tabular format
results_dys.vs.control<- DESeq2::results(dds_results)
results_dys.vs.control <- results_dys.vs.control[order(results_dys.vs.control$padj),]
#DT::datatable(as.data.frame(results_other.vs.control))
results_dys.vs.control <- results_dys.vs.control[!is.na(results_dys.vs.control$padj),]
c("fdr<=0.05"=sum(results_dys.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_dys.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_dys.vs.control$padj<=.001))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 3159 1353 425
dys.vs.control_up <- rownames(results_dys.vs.control[which((results_dys.vs.control$padj <= fdr)&(results_dys.vs.control$log2FoldChange> +max_fc)),])
dys.vs.control_down <- rownames(results_dys.vs.control[which((results_dys.vs.control$padj <= fdr)&(results_dys.vs.control$log2FoldChange < -min_fc)),])
c("dys.vs.control_up"= length(dys.vs.control_up),
"dys.vs.control_down"= length(dys.vs.control_down))
## dys.vs.control_up dys.vs.control_down
## 281 267
dys.vs.control_marker_list <- list(dys.vs.control_up, dys.vs.control_down)
DT::datatable(data.frame(results_dys.vs.control[results_dys.vs.control$padj<=.05 & results_dys.vs.control$log2FoldChange >= 1.5, ]), caption = "Dys.vs.Control-Upregulated")
DT::datatable(data.frame(results_dys.vs.control[results_dys.vs.control$padj<=.05 & results_dys.vs.control$log2FoldChange <= -1.5, ]), caption = "Dys.vs.Control-Downregulated")
topgenes <- unlist(dys.vs.control_marker_list)
heatdata <- log2(exprs(dys.vs.control)[topgenes,]+1)
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- 'Class'
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
annot_col <- pData(dys.vs.control)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
Reference level = Control
hknr.vs.control <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('Control', 'HkNR'))]
colData <- data.frame(condition=hknr.vs.control$Class)
colData$sex <- hknr.vs.control$Sex
colData$smoke <- hknr.vs.control$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(hknr.vs.control),
colData=colData,
design = ~sex+smoke+condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = 'Control')
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 865 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
#get the results in a nice tabular format
results_hknr.vs.control<- DESeq2::results(dds_results)
results_hknr.vs.control <- results_hknr.vs.control[order(results_hknr.vs.control$padj),]
#DT::datatable(as.data.frame(results_other.vs.control))
results_hknr.vs.control <- results_hknr.vs.control[!is.na(results_hknr.vs.control$padj),]
c("fdr<=0.05"=sum(results_hknr.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.control$padj<=.001))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 267 88 37
hknr.vs.control_up <- rownames(results_hknr.vs.control[which((results_hknr.vs.control$padj <= fdr)&(results_hknr.vs.control$log2FoldChange> +max_fc)),])
hknr.vs.control_down <- rownames(results_hknr.vs.control[which((results_hknr.vs.control$padj <= fdr)&(results_hknr.vs.control$log2FoldChange < -min_fc)),])
hknr.vs.control_marker_list <- list(hknr.vs.control_up, hknr.vs.control_down)
DT::datatable(data.frame(results_hknr.vs.control[results_hknr.vs.control$padj<=.05 & results_hknr.vs.control$log2FoldChange >= 1.5, ]), caption = "HkNR.vs.Control-Upregulated")
DT::datatable(data.frame(results_hknr.vs.control[results_hknr.vs.control$padj<=.05 & results_hknr.vs.control$log2FoldChange <= -1.5, ]), caption = "HkNR.vs.Control-Downregulated")
topgenes <- unlist(hknr.vs.control_marker_list)
heatdata <- log2(exprs(hknr.vs.control)[topgenes,]+1)
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- 'Class'
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
annot_col <- pData(hknr.vs.control)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
Reference level = Dysplasia
dys.vs.cancer <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('OSCC', 'Dysplasia'))]
colData <- data.frame(condition=dys.vs.cancer$Class)
colData$sex <- dys.vs.cancer$Sex
colData$smoke <- dys.vs.cancer$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(dys.vs.cancer),
colData=colData,
design = ~sex+smoke+condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = 'Dysplasia')
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 97 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
#get the results in a nice tabular format
results_dys.vs.cancer<- DESeq2::results(dds_results)
results_dys.vs.cancer <- results_dys.vs.cancer[order(results_dys.vs.cancer$padj),]
#DT::datatable(as.data.frame(results_other.vs.control))
results_dys.vs.cancer <- results_dys.vs.cancer[!is.na(results_dys.vs.cancer$padj),]
c("fdr<=0.05"=sum(results_dys.vs.cancer$padj<=.05),
"fdr<=0.01"=sum(results_dys.vs.cancer$padj<=.01),
"fdr<=0.001"=sum(results_dys.vs.cancer$padj<=.001))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 738 122 3
dys.vs.cancer_up <- rownames(results_dys.vs.cancer[which((results_dys.vs.cancer$padj <= fdr)&(results_dys.vs.cancer$log2FoldChange> +max_fc)),])
dys.vs.cancer_down <- rownames(results_dys.vs.cancer[which((results_dys.vs.cancer$padj <= fdr)&(results_dys.vs.cancer$log2FoldChange < -min_fc)),])
dys.vs.cancer_marker_list <- list(dys.vs.cancer_up, dys.vs.cancer_down)
DT::datatable(data.frame(results_dys.vs.cancer[results_dys.vs.cancer$padj<=.05 & results_dys.vs.cancer$log2FoldChange >= 1.5, ]), caption = "Dys.vs.Cancer-Upregulated")
DT::datatable(data.frame(results_dys.vs.cancer[results_dys.vs.cancer$padj<=.05 & results_dys.vs.cancer$log2FoldChange <= -1.5, ]), caption = "Dys.vs.Cancer-Downregulated")
topgenes <- unlist(dys.vs.control_marker_list)
heatdata <- log2(exprs(dys.vs.cancer)[topgenes,]+1)
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- 'Class'
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
annot_col <- pData(dys.vs.cancer)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
Reference level = HkNR
hknr.vs.cancer <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('OSCC', 'HkNR'))]
colData <- data.frame(condition=hknr.vs.cancer$Class)
colData$sex <- hknr.vs.cancer$Sex
colData$smoke <- hknr.vs.cancer$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(hknr.vs.cancer),
colData=colData,
design = ~sex+smoke+condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = 'HkNR')
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 488 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
#get the results in a nice tabular format
results_hknr.vs.cancer<- DESeq2::results(dds_results)
results_hknr.vs.cancer <- results_hknr.vs.cancer[order(results_hknr.vs.cancer$padj),]
#DT::datatable(as.data.frame(results_other.vs.control))
results_hknr.vs.cancer <- results_hknr.vs.cancer[!is.na(results_hknr.vs.cancer$padj),]
c("fdr<=0.05"=sum(results_hknr.vs.cancer$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.cancer$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.cancer$padj<=.001))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 1863 818 280
hknr.vs.cancer_up <- rownames(results_hknr.vs.cancer[which((results_hknr.vs.cancer$padj <= fdr)&(results_hknr.vs.cancer$log2FoldChange> +max_fc)),])
hknr.vs.cancer_down <- rownames(results_hknr.vs.cancer[which((results_hknr.vs.cancer$padj <= fdr)&(results_hknr.vs.cancer$log2FoldChange < -min_fc)),])
hknr.vs.cancer_marker_list <- list(hknr.vs.cancer_up, hknr.vs.cancer_down)
DT::datatable(data.frame(results_hknr.vs.cancer[results_hknr.vs.cancer$padj<=.05 & results_hknr.vs.cancer$log2FoldChange >= 1.5, ]), caption = "HkNR.vs.Cancer-Upregulated")
DT::datatable(data.frame(results_hknr.vs.cancer[results_hknr.vs.cancer$padj<=.05 & results_hknr.vs.cancer$log2FoldChange <= -1.5, ]), caption = "HkNR.vs.Cancer-Downregulated")
topgenes <- unlist(hknr.vs.control_marker_list)
heatdata <- log2(exprs(hknr.vs.cancer)[topgenes,]+1)
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- 'Class'
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
annot_col <- pData(hknr.vs.cancer)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
Reference level = HkNR
hknr.vs.dys <- eSet_wo_infl[, which(pData(eSet_wo_infl)$Class %in% c('HkNR', 'Dysplasia'))]
colData <- data.frame(condition=hknr.vs.dys$Class)
colData$sex <- as.character(hknr.vs.dys$Sex)
colData$smoke <- hknr.vs.dys$imputed_smoking_label
#make DESeq2 expected format for diffex analysis
dds <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(hknr.vs.dys),
colData=colData,
design = ~sex+smoke+condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## factor levels were dropped which had no samples
dds$condition <- relevel(dds$condition, ref = 'HkNR')
#perform diffex
dds_results <- DESeq2::DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 195 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
#get the results in a nice tabular format
results_hknr.vs.dys<- DESeq2::results(dds_results)
results_hknr.vs.dys <- results_hknr.vs.dys[order(results_hknr.vs.dys$padj),]
#DT::datatable(as.data.frame(results_other.vs.control))
results_hknr.vs.dys <- results_hknr.vs.dys[!is.na(results_hknr.vs.dys$padj),]
c("fdr<=0.05"=sum(results_hknr.vs.dys$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.dys$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.dys$padj<=.001))
## fdr<=0.05 fdr<=0.01 fdr<=0.001
## 388 78 19
hknr.vs.dys_up <- rownames(results_hknr.vs.dys[which((results_hknr.vs.dys$padj <= fdr)&(results_hknr.vs.dys$log2FoldChange> +max_fc)),])
hknr.vs.dys_down <- rownames(results_hknr.vs.dys[which((results_hknr.vs.dys$padj <= fdr)&(results_hknr.vs.dys$log2FoldChange < -min_fc)),])
hknr.vs.dys_marker_list <- list(hknr.vs.dys_up, hknr.vs.dys_down)
DT::datatable(data.frame(results_hknr.vs.dys[results_hknr.vs.dys$padj<=.05 & results_hknr.vs.dys$log2FoldChange >= 1.5, ]), caption = "HkNR.vs.Dys-Upregulated")
DT::datatable(data.frame(results_hknr.vs.dys[results_hknr.vs.dys$padj<=.05 & results_hknr.vs.dys$log2FoldChange <= -1.5, ]), caption = "HkNR.vs.Dys-Downregulated")
topgenes <- unlist(hknr.vs.dys_marker_list)
heatdata <- log2(exprs(hknr.vs.dys)[topgenes,]+1)
heatdata <- t(scale(t(heatdata)))
heatmapkey <- paste("Scaled", "counts", sep = "\n")
condition <- 'Class'
hc.row <- hcopt(as.dist(1-cor(t(heatdata))),method="ward.D")
annot_col <- pData(hknr.vs.dys)[,condition, drop=FALSE]
pheatmap::pheatmap(heatdata, cluster_rows = hc.row, show_rownames = FALSE, color = colorRampPalette(c("blue","white","red"))(100), annotation_col = annot_col)
num_marker2 <- c("fdr<=0.05"=sum(results_cancer.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_cancer.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_cancer.vs.control$padj<=.001),
"up reg"= length(cancer.vs.control_up),
"down reg"= length(cancer.vs.control_down),
"fdr<=0.05"=sum(results_dys.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_dys.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_dys.vs.control$padj<=.001),
"up reg"= length(dys.vs.control_up),
"down reg"= length(dys.vs.control_down),
"fdr<=0.05"=sum(results_hknr.vs.control$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.control$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.control$padj<=.001),
"up reg"= length(hknr.vs.control_up),
"down reg"= length(hknr.vs.control_down),
"fdr<=0.05"=sum(results_dys.vs.cancer$padj<=.05),
"fdr<=0.01"=sum(results_dys.vs.cancer$padj<=.01),
"fdr<=0.001"=sum(results_dys.vs.cancer$padj<=.001),
"up reg"= length(dys.vs.cancer_up),
"down reg"= length(dys.vs.cancer_down),
"fdr<=0.05"=sum(results_hknr.vs.cancer$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.cancer$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.cancer$padj<=.001),
"up reg"= length(hknr.vs.cancer_up),
"down reg"= length(hknr.vs.cancer_down),
"fdr<=0.05"=sum(results_hknr.vs.dys$padj<=.05),
"fdr<=0.01"=sum(results_hknr.vs.dys$padj<=.01),
"fdr<=0.001"=sum(results_hknr.vs.dys$padj<=.001),
"up reg"= length(hknr.vs.dys_up),
"down reg"= length(hknr.vs.dys_down)
)
df2 <- data.frame(matrix(t(num_marker2), nrow = 6, ncol = 5, byrow = TRUE))
names(df2)<- c("fdr<=0.05", "fdr<=0.01", "fdr<=0.001", "up reg", "down reg")
rownames(df2)<- c("Cancer vs. Control", "Dysplasia vs. Control", "HkNR vs. Control", "Dysplasia vs. Cancer", "HkNR vs. Cancer", "HkNR vs. Dysplasia")
DT::datatable(as.data.frame(df2))
pheatmap::pheatmap(df2, cluster_rows = FALSE, cluster_cols = FALSE)
library(xlsx)
#Pairwise results
xlsx::write.xlsx(x=results_cancer.vs.control, file = file.path(PATH, "results/PML.DiffEx.results.Can.vs.Ctrl.xlsx"), row.names = TRUE)
xlsx::write.xlsx(x=results_dys.vs.control, file = file.path(PATH, "results/PML.DiffEx.results.Dys.vs.Ctrl.xlsx"), row.names = TRUE)
xlsx::write.xlsx(x=results_hknr.vs.control, file.path(PATH, "results/PML.DiffEx.results.Hknr.vs.Ctrl.xlsx"),row.names = TRUE)
xlsx::write.xlsx(x=results_dys.vs.cancer, file.path(PATH, "results/PML.DiffEx.results.Dys.vs.Can.xlsx"),row.names = TRUE)
xlsx::write.xlsx(x=results_hknr.vs.cancer, file.path(PATH, "results/PML.DiffEx.results.Hknr.vs.Can.xlsx"),row.names = TRUE)
xlsx::write.xlsx(x=results_hknr.vs.dys, file.path(PATH, "results/PML.DiffEx.results.Hknr.vs.Dys.xlsx"),row.names = TRUE)
Store diffanal signatures to be used in GSEA analysis.
cancer.vs.control_sign <- list(up= cancer.vs.control_marker_list[[1]],
down=cancer.vs.control_marker_list[[2]])
dys.vs.control_sign <- list(up=dys.vs.control_marker_list[[1]],
down=dys.vs.control_marker_list[[2]])
hknr.vs.control_sign <- list(up=hknr.vs.control_marker_list[[1]],
down=hknr.vs.control_marker_list[[2]])
dys.vs.cancer_sign <- list(up=dys.vs.cancer_marker_list[[1]],
down=dys.vs.cancer_marker_list[[2]])
hknr.vs.cancer_sign <- list(up=hknr.vs.cancer_marker_list[[1]],
down=hknr.vs.cancer_marker_list[[2]])
hknr.vs.dys_sign <- list(up=hknr.vs.dys_marker_list[[1]],
down=hknr.vs.dys_marker_list[[2]])
#with control
signatures1 <- list("cancer.vs.control_up"=cancer.vs.control_marker_list[[1]],
"cancer.vs.control_down"= cancer.vs.control_marker_list[[2]],
"dys.vs.control_up"=dys.vs.control_marker_list[[1]],
"dys.vs.control_down"=dys.vs.control_marker_list[[2]],
"hknr.vs.control_up" =hknr.vs.control_marker_list[[1]],
"hknr.vs.control_down"=hknr.vs.control_marker_list[[2]]
)
#with cancer
signatures2 <- list("dys.vs.cancer_up"=dys.vs.cancer_marker_list[[1]],
"dys.vs.cancer_down"=dys.vs.cancer_marker_list[[2]],
"hknr.vs.cancer_up"=hknr.vs.cancer_marker_list[[1]],
"hknr.vs.cancer_down" =hknr.vs.cancer_marker_list[[2]]
)
#pairwise(infl, hknr, dys)
signatures3 <- list("hknr.vs.dys_up"=hknr.vs.dys_marker_list[[1]],
"hknr.vs.dys_down"=hknr.vs.dys_marker_list[[2]]
)
list_signs_wo_infl <- list(signatures1, signatures2, signatures3)
#saveRDS(list_signs_wo_infl, file = file.path(PATH, "results/06_22_pml_signatures_w_sex_smoke_logFC1.5_fdr0.05.RDS"))